# Replication file for "A Friend Like Me: The Effect of IO Membership on State Preferences"
# Authors: Naomi Egel and Nina Obermeier
# R version 4.1.2 (2021-11-01)


# Clean working environment
rm(list = ls())

# Load packages
library(plm)
library(lmtest)
library(tidyr)
library(dplyr)
library(tidyverse)
library(broom)
library(cem)
library(MatchIt)
library(cowplot)
library(ggplot2)


### Table in the main manuscript

## Table 1

# Load Eastern accession data
setwd()

eastern_data <- read.csv("A_friend_like_me_eastern.csv")

# Create panel data
eastern_panel <- pdata.frame(eastern_data, index=c("dyad","year"), drop.index=FALSE, row.names=TRUE)

# Make year numeric
eastern_panel$year <- as.numeric(as.character(eastern_panel$year))

# Table 1, Column 1
model_1 <- plm(idealsimilarity ~ treatment * post, data = eastern_panel, model = "within")

summary(model_1)

# Table 1, Column 2
model_2 <- plm(idealsimilarity ~ treatment * post + other_joint_IO_memberships + relative_economic_size + 
                                relative_economic_development + politydifference + geographic_distance, 
               data = eastern_panel, model = "within")

summary(model_2)

# Table 1, Column 3
model_3 <- plm(idealsimilarity ~ treatment_levels * post, data = eastern_panel, model = "within")

summary(model_3)

#Table 1, Column 4
model_4 <- plm(idealsimilarity ~ treatment * post + year * dyad, data = eastern_panel, model = "within")

summary(model_4)


### Tables and figures in the online appendix

## Table C1

# Table C1, Column 1
sapply(eastern_data, mean, na.rm=TRUE) 

# Table C1, Column 2
sapply(eastern_data, sd, na.rm=TRUE)

# Table C1, Column 3
sapply(eastern_data, min, na.rm=TRUE)

# Table C1, Column 4
sapply(eastern_data, max, na.rm=TRUE)

## Table C2

# Load data including EU-15 member states
eastern_eu15_data <- read.csv("A_friend_like_me_eastern_EU15.csv")

# Table C2, Column 1
sapply(eastern_eu15_data, mean, na.rm=TRUE) 

# Table C2, Column 2
sapply(eastern_eu15_data, sd, na.rm=TRUE) 

# Table C2, Column 3
sapply(eastern_eu15_data, min, na.rm=TRUE) 

# Table C2, Column 4
sapply(eastern_eu15_data, max, na.rm=TRUE) 


## Table C3

# Load Northern accession data
northern_data <- read.csv("A_friend_like_me_northern.csv")

# Table C3, Column 1
sapply(northern_data, mean, na.rm=TRUE) 

# Table C3, Column 2
sapply(northern_data, sd, na.rm=TRUE) 

# Table C3, Column 3
sapply(northern_data, min, na.rm=TRUE) 

# Table C4, Column 4
sapply(northern_data, max, na.rm=TRUE) 


## Table D1 
model_2 <- plm(idealsimilarity ~ treatment * post + other_joint_IO_memberships + relative_economic_size + 
                 relative_economic_development + politydifference + geographic_distance, 
               data = eastern_panel, model = "within")

summary(model_2)


## Table D2

# Run model
model_1 <- plm(idealsimilarity ~ treatment * post, data = eastern_panel, model = "within")

summary(model_1)

# Cluster standard errors
coeftest(model_1, vcov=vcovHC(model_1, type = "HC0", cluster = "group"))


## Figure E1

# Left panel
unmatched_trend_model <- plm(idealsimilarity ~ year90 * treatment + year91 * treatment + year92 * treatment + 
                      year93 * treatment + year94 * treatment + year95 * treatment + year96 * treatment + 
                      year97 * treatment + year98 * treatment + year99 * treatment + year00 * treatment + 
                      year01 * treatment + year02 * treatment + year04 * treatment + year05 * treatment +
                      year06 * treatment + year07 * treatment + year08 * treatment + year09 * treatment + 
                      year10 * treatment + year11 * treatment + year12 * treatment + year13 * treatment + 
                      year14 * treatment, data = eastern_panel, model = "within")

multiplier <- qnorm(1 - 0.05 / 2)

unmatched_trend_model <- tidy(unmatched_trend_model) %>%
  mutate(ymin = estimate - (multiplier * std.error),
         ymax = estimate + (multiplier * std.error))

unmatched_trend_model <- filter(unmatched_trend_model, term == "year90:treatment" | term == "treatment:year91" | 
                                  term == "treatment:year92" | term == "treatment:year93" | 
                                  term == "treatment:year94" | term == "treatment:year95" | 
                                  term == "treatment:year96" | term == "treatment:year97" | 
                                  term == "treatment:year98" | term == "treatment:year99" | 
                                  term == "treatment:year00" | term == "treatment:year01" | 
                                  term == "treatment:year02" | term == "treatment:year04" | 
                                  term == "treatment:year05" | term == "treatment:year06" | 
                                  term == "treatment:year07" | term == "treatment:year08" | 
                                  term == "treatment:year09" | term == "treatment:year10" | 
                                  term == "treatment:year11" | term == "treatment:year12" | 
                                  term == "treatment:year13" | term == "treatment:year14" )

var.names <- character(length(unmatched_trend_model))

unmatched_trend_model$var.names <- c("1990", "1991", "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999",
                                     "2000", "2001", "2002", "2004", "2005", "2006", "2007", "2008", "2009", "2010", 
                                     "2011", "2012", "2013", "2014")

unmatched_trend_model$var.names <- as.numeric(unmatched_trend_model$var.names)

unmatched_trend_plot <- ggplot(unmatched_trend_model, aes(x=var.names, y=estimate)) + 
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_vline(xintercept = 2003.5, colour = "black", linetype="longdash") +
  geom_pointrange(aes(ymin=ymin, ymax=ymax),  lwd = 1/2, shape = 21, fill = "BLACK") +
  labs(x="Year", y="Estimated treatment effect") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) + 
  ylim(-0.5,0.8) + xlim(1990,2015)

# Matching on relative economic size, relative economic development, domestic political difference and distance

matching_vars <- c("idealsimilarity", "geographic_distance","politydifference", "relative_economic_size", 
             "relative_economic_development", "treatment", "year90", "year91", "year92", "year93", "year94", "year95",
             "year96", "year97", "year98", "year99", "year00", "year01", "year02", "year03", "year04", "year05", 
             "year06", "year07", "year08", "year09", "year10", "year11", "year12", "year13", "year14", "post", "dyad", 
             "year", "idealsimilarity_eu_15", "trade_eu_15", "other_joint_IO_memberships")

matching_data <- eastern_data[matching_vars]

matching_data <- na.omit(matching_data)

matching_data <- as.data.frame(matching_data)

matching_model <- matchit(treatment ~ geographic_distance + relative_economic_size + relative_economic_development + 
                   politydifference, data = matching_data, method = "cem")

matched_data <- match.data(matching_model)

matched_panel <- pdata.frame(matched_data, index=c("dyad","year"), drop.index = FALSE, row.names=TRUE)

# Right panel

matched_trend_model <- plm(idealsimilarity ~ treatment * year90 + treatment * year91 * treatment + 
                             year92 * treatment + year93 * treatment + year94 * treatment + year95 * treatment + 
                             year96 * treatment + year97 * treatment + year98 * treatment + year99 * treatment + 
                             year00 * treatment + year01 * treatment + year02 * treatment + year04 * treatment + 
                             year05 * treatment + year06 * treatment + year07 * treatment + year08 * treatment + 
                             year09 * treatment + year10 * treatment + year11 * treatment + year12 * treatment + 
                             year13 * treatment + year14 * treatment, data = matched_panel, model = "within", 
                           weights = weights)

matched_trend_model <- tidy(matched_trend_model) %>%
  mutate(ymin = estimate - (multiplier * std.error),
         ymax = estimate + (multiplier * std.error))


matched_trend_model <- filter(matched_trend_model, term == "treatment:year90" | term == "treatment:year91" | 
                                term == "treatment:year92" | term == "treatment:year93" | 
                                term == "treatment:year94" | term == "treatment:year95" | 
                                term == "treatment:year96" | term == "treatment:year97" | 
                                term == "treatment:year98" | term == "treatment:year99" | 
                                term == "treatment:year00" | term == "treatment:year01" |
                                term == "treatment:year02" | term == "treatment:year04" | 
                                term == "treatment:year05" | term == "treatment:year06" | 
                                term == "treatment:year07" | term == "treatment:year08" | 
                                term == "treatment:year09" | term == "treatment:year10" | 
                                term == "treatment:year11" | term == "treatment:year12" | 
                                term == "treatment:year13" | term == "treatment:year14" )

matched_trend_model$var.names <- c("1990", "1991", "1992", "1993", "1994", "1995", "1996", "1997", "1998", "1999", 
                                   "2000", "2001", "2002", "2004", "2005", "2006", "2007", "2008", "2009", "2010", 
                                   "2011", "2012", "2013", "2014")

matched_trend_model$var.names <- as.numeric(matched_trend_model$var.names)

matched_trend_plot <- ggplot(matched_trend_model, aes(x=var.names, y=estimate)) + 
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_vline(xintercept = 2003.5, colour = "black", linetype="longdash") +
  geom_pointrange(aes(ymin=ymin, ymax=ymax),  lwd = 1/2, shape = 21, fill = "BLACK") +
  labs(x="Year", y="Estimated treatment effect") +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) + 
  ylim(-0.5,0.8) + xlim(1990,2015)

# Full Figure E1

plot_grid(unmatched_trend_plot, matched_trend_plot)


## Table E1

matched_model <- plm(idealsimilarity ~ treatment * post, data = matched_panel, model = "within", 
              weights = weights)

summary(matched_model)


## Table F1

# Load data including EU-15 member states
eastern_eu15_data <- read.csv("A_friend_like_me_eastern_EU15.csv")

# Create panel data
eastern_eu15_panel <- pdata.frame(eastern_eu15_data, index=c("dyad","year"), drop.index=FALSE, row.names=TRUE)

# Run model
eastern_eu15_model <- plm(idealsimilarity ~ joint_IO_membership * new_EU_member * post + 
                             relative_economic_development + relative_economic_size + politydifference + 
                             geographic_distance, data = eastern_eu15_panel, model = "within")

summary(eastern_eu15_model)



## Table G1

# Load Northern accession data
northern_data <- read.csv("A_friend_like_me_northern.csv")

# Create panel data
northern_panel <- pdata.frame(northern_data, index=c("dyad","year"), drop.index=FALSE, row.names=TRUE)

# Run model
northern_model <- plm(idealsimilarity ~ treatment * post, data = northern_panel, model="within")

summary(northern_model)




